{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       "/* 本电子讲义使用之CSS */\n",
       "div.code_cell {\n",
       "    background-color: #e5f1fe;\n",
       "}\n",
       "div.cell.selected {\n",
       "    background-color: #effee2;\n",
       "    font-size: 2rem;\n",
       "    line-height: 2.4rem;\n",
       "}\n",
       "div.cell.selected .rendered_html table {\n",
       "    font-size: 2rem !important;\n",
       "    line-height: 2.4rem !important;\n",
       "}\n",
       ".rendered_html pre code {\n",
       "    background-color: #C4E4ff;   \n",
       "    padding: 2px 25px;\n",
       "}\n",
       ".rendered_html pre {\n",
       "    background-color: #99c9ff;\n",
       "}\n",
       "div.code_cell .CodeMirror {\n",
       "    font-size: 2rem !important;\n",
       "    line-height: 2.4rem !important;\n",
       "}\n",
       ".rendered_html img, .rendered_html svg {\n",
       "    max-width: 60%;\n",
       "    height: auto;\n",
       "    float: right;\n",
       "}\n",
       "\n",
       ".rendered_html img[src*=\"#full\"], .rendered_html svg[src*=\"#full\"] {\n",
       "    max-width: 100%;\n",
       "    height: auto;\n",
       "    float: none;\n",
       "}\n",
       "\n",
       ".rendered_html img[src*=\"#thumbnail\"], .rendered_html svg[src*=\"#thumbnail\"] {\n",
       "    max-width: 15%;\n",
       "    height: auto;\n",
       "}\n",
       "\n",
       "/* Gradient transparent - color - transparent */\n",
       "hr {\n",
       "    border: 0;\n",
       "    border-bottom: 1px dashed #ccc;\n",
       "}\n",
       ".emoticon{\n",
       "    font-size: 5rem;\n",
       "    line-height: 4.4rem;\n",
       "    text-align: center;\n",
       "    vertical-align: middle;\n",
       "}\n",
       ".bg-split_apply_comine {\n",
       "    width: 500px;     \n",
       "    height: 300px;\n",
       "    background: url('02_split-apply-comine_500x300.png') -10px -10px;\n",
       "    float: right;\n",
       "}\n",
       ".bg-comine {\n",
       "    width: 175px;\n",
       "    height: 150px;\n",
       "    background: url('02_split-apply-comine_500x300.png') -280px -80px;\n",
       "    float: right;\n",
       "}\n",
       ".bg-apply {\n",
       "    width: 155px;\n",
       "    height: 225px;\n",
       "    background: url('02_split-apply-comine_500x300.png') -160px -30px;\n",
       "    float: right;\n",
       "}\n",
       ".bg-split {\n",
       "    width: 205px;\n",
       "    height: 225px;\n",
       "    background: url('02_split-apply-comine_500x300.png') -10px -30px;\n",
       "    float: right;\n",
       "}\n",
       ".break {\n",
       "                   page-break-after: right; \n",
       "                   width:700px;\n",
       "                   clear:both;\n",
       "}\n",
       "</style>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%html\n",
    "<style>\n",
    "/* 本电子讲义使用之CSS */\n",
    "div.code_cell {\n",
    "    background-color: #e5f1fe;\n",
    "}\n",
    "div.cell.selected {\n",
    "    background-color: #effee2;\n",
    "    font-size: 2rem;\n",
    "    line-height: 2.4rem;\n",
    "}\n",
    "div.cell.selected .rendered_html table {\n",
    "    font-size: 2rem !important;\n",
    "    line-height: 2.4rem !important;\n",
    "}\n",
    ".rendered_html pre code {\n",
    "    background-color: #C4E4ff;   \n",
    "    padding: 2px 25px;\n",
    "}\n",
    ".rendered_html pre {\n",
    "    background-color: #99c9ff;\n",
    "}\n",
    "div.code_cell .CodeMirror {\n",
    "    font-size: 2rem !important;\n",
    "    line-height: 2.4rem !important;\n",
    "}\n",
    ".rendered_html img, .rendered_html svg {\n",
    "    max-width: 60%;\n",
    "    height: auto;\n",
    "    float: right;\n",
    "}\n",
    "\n",
    ".rendered_html img[src*=\"#full\"], .rendered_html svg[src*=\"#full\"] {\n",
    "    max-width: 100%;\n",
    "    height: auto;\n",
    "    float: none;\n",
    "}\n",
    "\n",
    ".rendered_html img[src*=\"#thumbnail\"], .rendered_html svg[src*=\"#thumbnail\"] {\n",
    "    max-width: 15%;\n",
    "    height: auto;\n",
    "}\n",
    "\n",
    "/* Gradient transparent - color - transparent */\n",
    "hr {\n",
    "    border: 0;\n",
    "    border-bottom: 1px dashed #ccc;\n",
    "}\n",
    ".emoticon{\n",
    "    font-size: 5rem;\n",
    "    line-height: 4.4rem;\n",
    "    text-align: center;\n",
    "    vertical-align: middle;\n",
    "}\n",
    ".bg-split_apply_comine {\n",
    "    width: 500px;     \n",
    "    height: 300px;\n",
    "    background: url('02_split-apply-comine_500x300.png') -10px -10px;\n",
    "    float: right;\n",
    "}\n",
    ".bg-comine {\n",
    "    width: 175px;\n",
    "    height: 150px;\n",
    "    background: url('02_split-apply-comine_500x300.png') -280px -80px;\n",
    "    float: right;\n",
    "}\n",
    ".bg-apply {\n",
    "    width: 155px;\n",
    "    height: 225px;\n",
    "    background: url('02_split-apply-comine_500x300.png') -160px -30px;\n",
    "    float: right;\n",
    "}\n",
    ".bg-split {\n",
    "    width: 205px;\n",
    "    height: 225px;\n",
    "    background: url('02_split-apply-comine_500x300.png') -10px -30px;\n",
    "    float: right;\n",
    "}\n",
    ".break {\n",
    "                   page-break-after: right; \n",
    "                   width:700px;\n",
    "                   clear:both;\n",
    "}\n",
    "</style>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 基本模块\n",
    "import pandas as pd\n",
    "from requests_html import HTMLSession"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[60, 60, 60, 60, 60, 60, 60, 60]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>职称</th>\n",
       "      <th>薪水</th>\n",
       "      <th>公司地点</th>\n",
       "      <th>公司名称</th>\n",
       "      <th>时间</th>\n",
       "      <th>经验</th>\n",
       "      <th>链结</th>\n",
       "      <th>公司URL</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>旅游产品经理</td>\n",
       "      <td>12-20k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>前海爱讯科技(深圳)有限公司</td>\n",
       "      <td>19小时前</td>\n",
       "      <td>2年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/job/1926703515.shtml</td>\n",
       "      <td>https://m.liepin.com/company/8972310/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>教育科技 软件产品经理</td>\n",
       "      <td>12-18k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>融捷投资控股集团</td>\n",
       "      <td>昨天</td>\n",
       "      <td>3年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/job/1922705123.shtml</td>\n",
       "      <td>https://m.liepin.com/company/8025674/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>产品经理</td>\n",
       "      <td>12-18k·12薪</td>\n",
       "      <td>广州-海珠区</td>\n",
       "      <td>广州大白互联网科技有限公司</td>\n",
       "      <td>昨天</td>\n",
       "      <td>2年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1922402715.shtml</td>\n",
       "      <td>https://m.liepin.com/company/8695948/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>实施经理</td>\n",
       "      <td>16-23k·12薪</td>\n",
       "      <td>广州-大沙</td>\n",
       "      <td>广东卓志供应链服务集团有限公司</td>\n",
       "      <td>2020-03-23</td>\n",
       "      <td>5年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/job/1924985573.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9238204/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>互联网产品经理</td>\n",
       "      <td>10-15k·12薪</td>\n",
       "      <td>广州-琶洲</td>\n",
       "      <td>广东车海洋环保科技有限公司</td>\n",
       "      <td>2020-03-20</td>\n",
       "      <td>3年以上 大专及以上</td>\n",
       "      <td>https://m.liepin.com/job/1917453193.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9256869/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>后台产品经理</td>\n",
       "      <td>10-20k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>广东南方新媒体股份有限公司</td>\n",
       "      <td>一个月前</td>\n",
       "      <td>3年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1925126353.shtml</td>\n",
       "      <td>https://m.liepin.com/company/7889168/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>区块链产品经理</td>\n",
       "      <td>15-25k·12薪</td>\n",
       "      <td>广州-黄埔区</td>\n",
       "      <td>北京普瑞未来教育科技集团有限公司</td>\n",
       "      <td>一个月前</td>\n",
       "      <td>3年以上 大专及以上</td>\n",
       "      <td>https://m.liepin.com/job/1919835727.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9989029/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>高级产品经理</td>\n",
       "      <td>20-25k·13薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>某软件开发企业</td>\n",
       "      <td>昨天</td>\n",
       "      <td>3年以上 大专及以上</td>\n",
       "      <td>https://m.liepin.com/a/18948933.shtml</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>产品经理（电商系统）</td>\n",
       "      <td>25-40k·14薪</td>\n",
       "      <td>广东,深圳,广州</td>\n",
       "      <td>知名跨境电商公司</td>\n",
       "      <td>昨天</td>\n",
       "      <td>3年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/a/18705133.shtml</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>WMS产品经理</td>\n",
       "      <td>20-35k·14薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>某知名跨境电商平台</td>\n",
       "      <td>一个月前</td>\n",
       "      <td>2年以上 学历不限</td>\n",
       "      <td>https://m.liepin.com/a/18963147.shtml</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>产品经理（支付/后端）</td>\n",
       "      <td>10-20k·12薪</td>\n",
       "      <td>广州-海珠区</td>\n",
       "      <td>北京路客互联网科技有限公司</td>\n",
       "      <td>一个月前</td>\n",
       "      <td>3年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1917750895.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9284656/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>产品总监</td>\n",
       "      <td>50-70k·13薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>名创优品</td>\n",
       "      <td>一个月前</td>\n",
       "      <td>8年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/job/1925389277.shtml</td>\n",
       "      <td>https://m.liepin.com/company/8392675/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>产品专员</td>\n",
       "      <td>5-8k·12薪</td>\n",
       "      <td>广州-海珠区</td>\n",
       "      <td>广州三易互联网科技有限公司</td>\n",
       "      <td>一个月前</td>\n",
       "      <td>经验不限 学历不限</td>\n",
       "      <td>https://m.liepin.com/job/1922364281.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9647941/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>产品助理</td>\n",
       "      <td>5-8k·13薪</td>\n",
       "      <td>广州-海珠区</td>\n",
       "      <td>广州三易互联网科技有限公司</td>\n",
       "      <td>一个月前</td>\n",
       "      <td>经验不限 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1922356557.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9647941/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>产品经理</td>\n",
       "      <td>10-20k·12薪</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td>广州易达建信科技开发有限公司</td>\n",
       "      <td>一个月前</td>\n",
       "      <td>1年以上 大专及以上</td>\n",
       "      <td>https://m.liepin.com/job/1919464529.shtml</td>\n",
       "      <td>https://m.liepin.com/company/5493174/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>产品经理</td>\n",
       "      <td>14-22k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>锦江信息技术(广州)有限公司</td>\n",
       "      <td>一个月前</td>\n",
       "      <td>5年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1919024715.shtml</td>\n",
       "      <td>https://m.liepin.com/company/8973053/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>供应链产品经理</td>\n",
       "      <td>10-23k·12薪</td>\n",
       "      <td>广州-黄埔区</td>\n",
       "      <td>健客网</td>\n",
       "      <td>一个月前</td>\n",
       "      <td>2年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1914662183.shtml</td>\n",
       "      <td>https://m.liepin.com/company/582047/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>产品经理</td>\n",
       "      <td>15-25k·12薪</td>\n",
       "      <td>广州-海珠区</td>\n",
       "      <td>青木数字技术股份有限公司</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>5年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/job/1927082439.shtml</td>\n",
       "      <td>https://m.liepin.com/company/12191983/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>产品经理（校园招聘）</td>\n",
       "      <td>8-12k·13薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>佳都新太科技</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>经验不限 统招本科</td>\n",
       "      <td>https://m.liepin.com/job/1927075137.shtml</td>\n",
       "      <td>https://m.liepin.com/company/2115085/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>WXG03-微信公众号小程序生活服务行业产品经理（广州）</td>\n",
       "      <td>面议</td>\n",
       "      <td>广州</td>\n",
       "      <td>腾讯</td>\n",
       "      <td>11分钟前</td>\n",
       "      <td>2年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1927010729.shtml</td>\n",
       "      <td>https://m.liepin.com/company/7983148/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>高级业务架构师（数字化新零售）</td>\n",
       "      <td>面议</td>\n",
       "      <td>广州-海珠区</td>\n",
       "      <td>广州滴普科技有限公司</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>8年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1926800719.shtml</td>\n",
       "      <td>https://m.liepin.com/company/10166945/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>产品经理（智能终端产品）</td>\n",
       "      <td>面议</td>\n",
       "      <td>广州</td>\n",
       "      <td>佳都新太科技</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>3年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/job/1926797053.shtml</td>\n",
       "      <td>https://m.liepin.com/company/2115085/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>产品经理</td>\n",
       "      <td>20-40k·12薪</td>\n",
       "      <td>广州-番禺区</td>\n",
       "      <td>广州探迹科技有限公司</td>\n",
       "      <td>35分钟前</td>\n",
       "      <td>3年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/job/1926723287.shtml</td>\n",
       "      <td>https://m.liepin.com/company/8836020/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>产品经理（营收）</td>\n",
       "      <td>20-30k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>上海翡翠东方网络信息技术有限公司</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>经验不限 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1926712533.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9947855/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>产品经理-内容优化方向</td>\n",
       "      <td>15-25k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>上海翡翠东方网络信息技术有限公司</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>经验不限 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1926699881.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9947855/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>平台SDK产品经理</td>\n",
       "      <td>15-25k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>上海翡翠东方网络信息技术有限公司</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>经验不限 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1926699879.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9947855/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>账号产品经理</td>\n",
       "      <td>15-25k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>上海翡翠东方网络信息技术有限公司</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>3年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1926647497.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9947855/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>产品经理（电商/社交方向）</td>\n",
       "      <td>8-15k·13薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>广东人民出版社有限公司</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>3年以上 大专及以上</td>\n",
       "      <td>https://m.liepin.com/job/1926576913.shtml</td>\n",
       "      <td>https://m.liepin.com/company/10012691/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>资深产品经理（相机产品）</td>\n",
       "      <td>面议</td>\n",
       "      <td>广州</td>\n",
       "      <td>网易集团</td>\n",
       "      <td>3小时前</td>\n",
       "      <td>5年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1926534703.shtml</td>\n",
       "      <td>https://m.liepin.com/company/5964833/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>产品经理（用户体验改善）</td>\n",
       "      <td>15-25k·12薪</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td>嘟比英语</td>\n",
       "      <td>6分钟前</td>\n",
       "      <td>3年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/job/1926412121.shtml</td>\n",
       "      <td>https://m.liepin.com/company/12166375/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>产品经理</td>\n",
       "      <td>15-20k·13薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>广州诚迈信息科技有限公司</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>3年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1926106673.shtml</td>\n",
       "      <td>https://m.liepin.com/company/10063493/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>高级产品经理</td>\n",
       "      <td>25-35k·15薪</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td>上海翡翠东方网络信息技术有限公司</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>5年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1925922019.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9947855/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>产品专员</td>\n",
       "      <td>6-10k·15薪</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td>上海翡翠东方网络信息技术有限公司</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>1年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1925921709.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9947855/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>高级产品经理(J10274)</td>\n",
       "      <td>14-20k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>广州金鹏集团有限公司</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>3年以上 大专及以上</td>\n",
       "      <td>https://m.liepin.com/job/1925674943.shtml</td>\n",
       "      <td>https://m.liepin.com/company/7999640/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>ATS需求分析师</td>\n",
       "      <td>面议</td>\n",
       "      <td>广州</td>\n",
       "      <td>佳都新太科技</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>3年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/job/1925556345.shtml</td>\n",
       "      <td>https://m.liepin.com/company/2115085/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>产品经理（临床科研）</td>\n",
       "      <td>15-25k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>健康互联(广州)信息科技股份有限公司</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>2年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1925540179.shtml</td>\n",
       "      <td>https://m.liepin.com/company/10087541/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>产品经理-供应链金融</td>\n",
       "      <td>15-30k·12薪</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td>TCL金融</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>3年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/job/1925519307.shtml</td>\n",
       "      <td>https://m.liepin.com/company/7876336/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>直播产品经理</td>\n",
       "      <td>15-25k·15薪</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td>上海翡翠东方网络信息技术有限公司</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>3年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1924987385.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9947855/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>产品经理（全自动运行方向）</td>\n",
       "      <td>10-20k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>佳都新太科技</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>3年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1924819589.shtml</td>\n",
       "      <td>https://m.liepin.com/company/2115085/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>产品经理（节能控制方向）</td>\n",
       "      <td>10-20k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>佳都新太科技</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>3年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1924819521.shtml</td>\n",
       "      <td>https://m.liepin.com/company/2115085/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>产品经理（智能运维方向）</td>\n",
       "      <td>10-20k·14薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>佳都新太科技</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>5年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/job/1924549497.shtml</td>\n",
       "      <td>https://m.liepin.com/company/2115085/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>产品经理（数字孪生方向）</td>\n",
       "      <td>10-20k·14薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>佳都新太科技</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>5年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/job/1924549389.shtml</td>\n",
       "      <td>https://m.liepin.com/company/2115085/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>产品经理</td>\n",
       "      <td>8-10k·12薪</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td>广东高乐教育科技有限公司</td>\n",
       "      <td>51分钟前</td>\n",
       "      <td>2年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1924467355.shtml</td>\n",
       "      <td>https://m.liepin.com/company/10156263/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>数据产品经理</td>\n",
       "      <td>18-25k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>广州共享出行科技有限公司</td>\n",
       "      <td>25分钟前</td>\n",
       "      <td>5年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1924261327.shtml</td>\n",
       "      <td>https://m.liepin.com/company/10118191/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>直播营收产品运营经理</td>\n",
       "      <td>20-30k·15薪</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td>上海翡翠东方网络信息技术有限公司</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>3年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1924139323.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9947855/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>产品策划经理</td>\n",
       "      <td>8-12k·12薪</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td>广东高乐教育科技有限公司</td>\n",
       "      <td>51分钟前</td>\n",
       "      <td>3年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/job/1922519235.shtml</td>\n",
       "      <td>https://m.liepin.com/company/10156263/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>产品经理（跨境业务）</td>\n",
       "      <td>10-20k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>易联支付</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>3年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/job/1921011983.shtml</td>\n",
       "      <td>https://m.liepin.com/company/8845617/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>内衣项目总经理 - 跨境电商出口行业</td>\n",
       "      <td>20-25k·12薪</td>\n",
       "      <td>广州-越秀区</td>\n",
       "      <td>文峰荟(广州)创业投资管理有限公司</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>10年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/job/1920988285.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9412628/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>产品经理（OA）</td>\n",
       "      <td>10-15k·12薪</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td>佛山市艾臣家居科技有限公司</td>\n",
       "      <td>3小时前</td>\n",
       "      <td>3年以上 学历不限</td>\n",
       "      <td>https://m.liepin.com/job/1919955237.shtml</td>\n",
       "      <td>https://m.liepin.com/company/9220328/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>资深产品</td>\n",
       "      <td>25-45k·16薪</td>\n",
       "      <td>广州,杭州</td>\n",
       "      <td>知名互联网公司</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>5年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/a/19561009.shtml</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>APP产品经理（知识付费/在线教育）</td>\n",
       "      <td>15-25k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>知识付费在线教育APP</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>2年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/a/19548311.shtml</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>APP产品经理（知识付费/在线教育）</td>\n",
       "      <td>10-15k·12薪</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td>知识付费在线教育APP</td>\n",
       "      <td>41分钟前</td>\n",
       "      <td>3年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/a/19483901.shtml</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>国际支付产品经理</td>\n",
       "      <td>25-50k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>知名金融科技企业</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>3年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/a/19439035.shtml</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>crm高级产品经理</td>\n",
       "      <td>25-50k·14薪</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td>国内知少儿绘画教育平台</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>5年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/a/19432331.shtml</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>产品总监</td>\n",
       "      <td>80-110k·15薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>某世界100强高科技公司</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>5年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/a/19427061.shtml</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>数据中台产品经理</td>\n",
       "      <td>20-40k·16薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>某大型游戏公司</td>\n",
       "      <td>1小时前</td>\n",
       "      <td>3年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/a/19399851.shtml</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>商家履约产品经理</td>\n",
       "      <td>25-50k·18薪</td>\n",
       "      <td>广东,广州,杭州</td>\n",
       "      <td>某大型知名跨境电商平台准独角兽（C+轮）</td>\n",
       "      <td>45分钟前</td>\n",
       "      <td>3年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/a/19371747.shtml</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>WMS产品经理</td>\n",
       "      <td>25-50k·18薪</td>\n",
       "      <td>广东,广州,杭州</td>\n",
       "      <td>某大型知名跨境电商平台准独角兽（C+轮）</td>\n",
       "      <td>45分钟前</td>\n",
       "      <td>3年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/a/19370965.shtml</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>资深产品经理（后台系统）</td>\n",
       "      <td>20-40k·12薪</td>\n",
       "      <td>广州</td>\n",
       "      <td>国内大型电商公司</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>3年以上 本科及以上</td>\n",
       "      <td>https://m.liepin.com/a/19250159.shtml</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>wms产品经理</td>\n",
       "      <td>25-50k·14薪</td>\n",
       "      <td>广州-海珠区,杭州-西湖区</td>\n",
       "      <td>某快速发展的电商公司</td>\n",
       "      <td>2小时前</td>\n",
       "      <td>3年以上 统招本科</td>\n",
       "      <td>https://m.liepin.com/a/19151205.shtml</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                               职称           薪水           公司地点  \\\n",
       "0                         旅游产品经理    12-20k·12薪             广州   \n",
       "1                    教育科技 软件产品经理    12-18k·12薪             广州   \n",
       "2                           产品经理    12-18k·12薪         广州-海珠区   \n",
       "3                           实施经理    16-23k·12薪          广州-大沙   \n",
       "4                        互联网产品经理    10-15k·12薪          广州-琶洲   \n",
       "5                         后台产品经理    10-20k·12薪             广州   \n",
       "6                        区块链产品经理    15-25k·12薪         广州-黄埔区   \n",
       "7                         高级产品经理    20-25k·13薪             广州   \n",
       "8                     产品经理（电商系统）    25-40k·14薪       广东,深圳,广州   \n",
       "9                        WMS产品经理    20-35k·14薪             广州   \n",
       "10                   产品经理（支付/后端）    10-20k·12薪         广州-海珠区   \n",
       "11                          产品总监    50-70k·13薪             广州   \n",
       "12                          产品专员      5-8k·12薪         广州-海珠区   \n",
       "13                          产品助理      5-8k·13薪         广州-海珠区   \n",
       "14                          产品经理    10-20k·12薪         广州-天河区   \n",
       "15                          产品经理    14-22k·12薪             广州   \n",
       "16                       供应链产品经理    10-23k·12薪         广州-黄埔区   \n",
       "17                          产品经理    15-25k·12薪         广州-海珠区   \n",
       "18                    产品经理（校园招聘）     8-12k·13薪             广州   \n",
       "19  WXG03-微信公众号小程序生活服务行业产品经理（广州）            面议             广州   \n",
       "20               高级业务架构师（数字化新零售）            面议         广州-海珠区   \n",
       "21                  产品经理（智能终端产品）            面议             广州   \n",
       "22                          产品经理    20-40k·12薪         广州-番禺区   \n",
       "23                      产品经理（营收）    20-30k·12薪             广州   \n",
       "24                   产品经理-内容优化方向    15-25k·12薪             广州   \n",
       "25                     平台SDK产品经理    15-25k·12薪             广州   \n",
       "26                        账号产品经理    15-25k·12薪             广州   \n",
       "27                 产品经理（电商/社交方向）     8-15k·13薪             广州   \n",
       "28                  资深产品经理（相机产品）            面议             广州   \n",
       "29                  产品经理（用户体验改善）    15-25k·12薪         广州-天河区   \n",
       "30                          产品经理    15-20k·13薪             广州   \n",
       "31                        高级产品经理    25-35k·15薪         广州-天河区   \n",
       "32                          产品专员     6-10k·15薪         广州-天河区   \n",
       "33                高级产品经理(J10274)    14-20k·12薪             广州   \n",
       "34                      ATS需求分析师            面议             广州   \n",
       "35                    产品经理（临床科研）    15-25k·12薪             广州   \n",
       "36                    产品经理-供应链金融    15-30k·12薪         广州-天河区   \n",
       "37                        直播产品经理    15-25k·15薪         广州-天河区   \n",
       "38                 产品经理（全自动运行方向）    10-20k·12薪             广州   \n",
       "39                  产品经理（节能控制方向）    10-20k·12薪             广州   \n",
       "40                  产品经理（智能运维方向）    10-20k·14薪             广州   \n",
       "41                  产品经理（数字孪生方向）    10-20k·14薪             广州   \n",
       "42                          产品经理     8-10k·12薪         广州-天河区   \n",
       "43                        数据产品经理    18-25k·12薪             广州   \n",
       "44                    直播营收产品运营经理    20-30k·15薪         广州-天河区   \n",
       "45                        产品策划经理     8-12k·12薪         广州-天河区   \n",
       "46                    产品经理（跨境业务）    10-20k·12薪             广州   \n",
       "47            内衣项目总经理 - 跨境电商出口行业    20-25k·12薪         广州-越秀区   \n",
       "48                      产品经理（OA）    10-15k·12薪         广州-天河区   \n",
       "49                          资深产品    25-45k·16薪          广州,杭州   \n",
       "50            APP产品经理（知识付费/在线教育）    15-25k·12薪             广州   \n",
       "51            APP产品经理（知识付费/在线教育）    10-15k·12薪         广州-天河区   \n",
       "52                      国际支付产品经理    25-50k·12薪             广州   \n",
       "53                     crm高级产品经理    25-50k·14薪         广州-天河区   \n",
       "54                          产品总监   80-110k·15薪             广州   \n",
       "55                      数据中台产品经理    20-40k·16薪             广州   \n",
       "56                      商家履约产品经理    25-50k·18薪       广东,广州,杭州   \n",
       "57                       WMS产品经理    25-50k·18薪       广东,广州,杭州   \n",
       "58                  资深产品经理（后台系统）    20-40k·12薪             广州   \n",
       "59                       wms产品经理    25-50k·14薪  广州-海珠区,杭州-西湖区   \n",
       "\n",
       "                    公司名称          时间          经验  \\\n",
       "0         前海爱讯科技(深圳)有限公司       19小时前   2年以上 统招本科   \n",
       "1               融捷投资控股集团          昨天   3年以上 统招本科   \n",
       "2          广州大白互联网科技有限公司          昨天  2年以上 本科及以上   \n",
       "3        广东卓志供应链服务集团有限公司  2020-03-23   5年以上 统招本科   \n",
       "4          广东车海洋环保科技有限公司  2020-03-20  3年以上 大专及以上   \n",
       "5          广东南方新媒体股份有限公司        一个月前  3年以上 本科及以上   \n",
       "6       北京普瑞未来教育科技集团有限公司        一个月前  3年以上 大专及以上   \n",
       "7                某软件开发企业          昨天  3年以上 大专及以上   \n",
       "8               知名跨境电商公司          昨天   3年以上 统招本科   \n",
       "9              某知名跨境电商平台        一个月前   2年以上 学历不限   \n",
       "10         北京路客互联网科技有限公司        一个月前  3年以上 本科及以上   \n",
       "11                  名创优品        一个月前   8年以上 统招本科   \n",
       "12         广州三易互联网科技有限公司        一个月前   经验不限 学历不限   \n",
       "13         广州三易互联网科技有限公司        一个月前  经验不限 本科及以上   \n",
       "14        广州易达建信科技开发有限公司        一个月前  1年以上 大专及以上   \n",
       "15        锦江信息技术(广州)有限公司        一个月前  5年以上 本科及以上   \n",
       "16                   健客网        一个月前  2年以上 本科及以上   \n",
       "17          青木数字技术股份有限公司        1小时前   5年以上 统招本科   \n",
       "18                佳都新太科技        2小时前   经验不限 统招本科   \n",
       "19                    腾讯       11分钟前  2年以上 本科及以上   \n",
       "20            广州滴普科技有限公司        2小时前  8年以上 本科及以上   \n",
       "21                佳都新太科技        2小时前   3年以上 统招本科   \n",
       "22            广州探迹科技有限公司       35分钟前   3年以上 统招本科   \n",
       "23      上海翡翠东方网络信息技术有限公司        1小时前  经验不限 本科及以上   \n",
       "24      上海翡翠东方网络信息技术有限公司        1小时前  经验不限 本科及以上   \n",
       "25      上海翡翠东方网络信息技术有限公司        1小时前  经验不限 本科及以上   \n",
       "26      上海翡翠东方网络信息技术有限公司        1小时前  3年以上 本科及以上   \n",
       "27           广东人民出版社有限公司        2小时前  3年以上 大专及以上   \n",
       "28                  网易集团        3小时前  5年以上 本科及以上   \n",
       "29                  嘟比英语        6分钟前   3年以上 统招本科   \n",
       "30          广州诚迈信息科技有限公司        1小时前  3年以上 本科及以上   \n",
       "31      上海翡翠东方网络信息技术有限公司        1小时前  5年以上 本科及以上   \n",
       "32      上海翡翠东方网络信息技术有限公司        1小时前  1年以上 本科及以上   \n",
       "33            广州金鹏集团有限公司        2小时前  3年以上 大专及以上   \n",
       "34                佳都新太科技        2小时前   3年以上 统招本科   \n",
       "35    健康互联(广州)信息科技股份有限公司        2小时前  2年以上 本科及以上   \n",
       "36                 TCL金融        2小时前   3年以上 统招本科   \n",
       "37      上海翡翠东方网络信息技术有限公司        1小时前  3年以上 本科及以上   \n",
       "38                佳都新太科技        2小时前  3年以上 本科及以上   \n",
       "39                佳都新太科技        2小时前  3年以上 本科及以上   \n",
       "40                佳都新太科技        2小时前   5年以上 统招本科   \n",
       "41                佳都新太科技        2小时前   5年以上 统招本科   \n",
       "42          广东高乐教育科技有限公司       51分钟前  2年以上 本科及以上   \n",
       "43          广州共享出行科技有限公司       25分钟前  5年以上 本科及以上   \n",
       "44      上海翡翠东方网络信息技术有限公司        1小时前  3年以上 本科及以上   \n",
       "45          广东高乐教育科技有限公司       51分钟前  3年以上 本科及以上   \n",
       "46                  易联支付        1小时前   3年以上 统招本科   \n",
       "47     文峰荟(广州)创业投资管理有限公司        1小时前  10年以上 统招本科   \n",
       "48         佛山市艾臣家居科技有限公司        3小时前   3年以上 学历不限   \n",
       "49               知名互联网公司        1小时前   5年以上 统招本科   \n",
       "50           知识付费在线教育APP        1小时前   2年以上 统招本科   \n",
       "51           知识付费在线教育APP       41分钟前  3年以上 本科及以上   \n",
       "52              知名金融科技企业        2小时前   3年以上 统招本科   \n",
       "53           国内知少儿绘画教育平台        1小时前   5年以上 统招本科   \n",
       "54          某世界100强高科技公司        2小时前   5年以上 统招本科   \n",
       "55               某大型游戏公司        1小时前  3年以上 本科及以上   \n",
       "56  某大型知名跨境电商平台准独角兽（C+轮）       45分钟前   3年以上 统招本科   \n",
       "57  某大型知名跨境电商平台准独角兽（C+轮）       45分钟前   3年以上 统招本科   \n",
       "58              国内大型电商公司        2小时前  3年以上 本科及以上   \n",
       "59            某快速发展的电商公司        2小时前   3年以上 统招本科   \n",
       "\n",
       "                                           链结  \\\n",
       "0   https://m.liepin.com/job/1926703515.shtml   \n",
       "1   https://m.liepin.com/job/1922705123.shtml   \n",
       "2   https://m.liepin.com/job/1922402715.shtml   \n",
       "3   https://m.liepin.com/job/1924985573.shtml   \n",
       "4   https://m.liepin.com/job/1917453193.shtml   \n",
       "5   https://m.liepin.com/job/1925126353.shtml   \n",
       "6   https://m.liepin.com/job/1919835727.shtml   \n",
       "7       https://m.liepin.com/a/18948933.shtml   \n",
       "8       https://m.liepin.com/a/18705133.shtml   \n",
       "9       https://m.liepin.com/a/18963147.shtml   \n",
       "10  https://m.liepin.com/job/1917750895.shtml   \n",
       "11  https://m.liepin.com/job/1925389277.shtml   \n",
       "12  https://m.liepin.com/job/1922364281.shtml   \n",
       "13  https://m.liepin.com/job/1922356557.shtml   \n",
       "14  https://m.liepin.com/job/1919464529.shtml   \n",
       "15  https://m.liepin.com/job/1919024715.shtml   \n",
       "16  https://m.liepin.com/job/1914662183.shtml   \n",
       "17  https://m.liepin.com/job/1927082439.shtml   \n",
       "18  https://m.liepin.com/job/1927075137.shtml   \n",
       "19  https://m.liepin.com/job/1927010729.shtml   \n",
       "20  https://m.liepin.com/job/1926800719.shtml   \n",
       "21  https://m.liepin.com/job/1926797053.shtml   \n",
       "22  https://m.liepin.com/job/1926723287.shtml   \n",
       "23  https://m.liepin.com/job/1926712533.shtml   \n",
       "24  https://m.liepin.com/job/1926699881.shtml   \n",
       "25  https://m.liepin.com/job/1926699879.shtml   \n",
       "26  https://m.liepin.com/job/1926647497.shtml   \n",
       "27  https://m.liepin.com/job/1926576913.shtml   \n",
       "28  https://m.liepin.com/job/1926534703.shtml   \n",
       "29  https://m.liepin.com/job/1926412121.shtml   \n",
       "30  https://m.liepin.com/job/1926106673.shtml   \n",
       "31  https://m.liepin.com/job/1925922019.shtml   \n",
       "32  https://m.liepin.com/job/1925921709.shtml   \n",
       "33  https://m.liepin.com/job/1925674943.shtml   \n",
       "34  https://m.liepin.com/job/1925556345.shtml   \n",
       "35  https://m.liepin.com/job/1925540179.shtml   \n",
       "36  https://m.liepin.com/job/1925519307.shtml   \n",
       "37  https://m.liepin.com/job/1924987385.shtml   \n",
       "38  https://m.liepin.com/job/1924819589.shtml   \n",
       "39  https://m.liepin.com/job/1924819521.shtml   \n",
       "40  https://m.liepin.com/job/1924549497.shtml   \n",
       "41  https://m.liepin.com/job/1924549389.shtml   \n",
       "42  https://m.liepin.com/job/1924467355.shtml   \n",
       "43  https://m.liepin.com/job/1924261327.shtml   \n",
       "44  https://m.liepin.com/job/1924139323.shtml   \n",
       "45  https://m.liepin.com/job/1922519235.shtml   \n",
       "46  https://m.liepin.com/job/1921011983.shtml   \n",
       "47  https://m.liepin.com/job/1920988285.shtml   \n",
       "48  https://m.liepin.com/job/1919955237.shtml   \n",
       "49      https://m.liepin.com/a/19561009.shtml   \n",
       "50      https://m.liepin.com/a/19548311.shtml   \n",
       "51      https://m.liepin.com/a/19483901.shtml   \n",
       "52      https://m.liepin.com/a/19439035.shtml   \n",
       "53      https://m.liepin.com/a/19432331.shtml   \n",
       "54      https://m.liepin.com/a/19427061.shtml   \n",
       "55      https://m.liepin.com/a/19399851.shtml   \n",
       "56      https://m.liepin.com/a/19371747.shtml   \n",
       "57      https://m.liepin.com/a/19370965.shtml   \n",
       "58      https://m.liepin.com/a/19250159.shtml   \n",
       "59      https://m.liepin.com/a/19151205.shtml   \n",
       "\n",
       "                                     公司URL  \n",
       "0    https://m.liepin.com/company/8972310/  \n",
       "1    https://m.liepin.com/company/8025674/  \n",
       "2    https://m.liepin.com/company/8695948/  \n",
       "3    https://m.liepin.com/company/9238204/  \n",
       "4    https://m.liepin.com/company/9256869/  \n",
       "5    https://m.liepin.com/company/7889168/  \n",
       "6    https://m.liepin.com/company/9989029/  \n",
       "7                                           \n",
       "8                                           \n",
       "9                                           \n",
       "10   https://m.liepin.com/company/9284656/  \n",
       "11   https://m.liepin.com/company/8392675/  \n",
       "12   https://m.liepin.com/company/9647941/  \n",
       "13   https://m.liepin.com/company/9647941/  \n",
       "14   https://m.liepin.com/company/5493174/  \n",
       "15   https://m.liepin.com/company/8973053/  \n",
       "16    https://m.liepin.com/company/582047/  \n",
       "17  https://m.liepin.com/company/12191983/  \n",
       "18   https://m.liepin.com/company/2115085/  \n",
       "19   https://m.liepin.com/company/7983148/  \n",
       "20  https://m.liepin.com/company/10166945/  \n",
       "21   https://m.liepin.com/company/2115085/  \n",
       "22   https://m.liepin.com/company/8836020/  \n",
       "23   https://m.liepin.com/company/9947855/  \n",
       "24   https://m.liepin.com/company/9947855/  \n",
       "25   https://m.liepin.com/company/9947855/  \n",
       "26   https://m.liepin.com/company/9947855/  \n",
       "27  https://m.liepin.com/company/10012691/  \n",
       "28   https://m.liepin.com/company/5964833/  \n",
       "29  https://m.liepin.com/company/12166375/  \n",
       "30  https://m.liepin.com/company/10063493/  \n",
       "31   https://m.liepin.com/company/9947855/  \n",
       "32   https://m.liepin.com/company/9947855/  \n",
       "33   https://m.liepin.com/company/7999640/  \n",
       "34   https://m.liepin.com/company/2115085/  \n",
       "35  https://m.liepin.com/company/10087541/  \n",
       "36   https://m.liepin.com/company/7876336/  \n",
       "37   https://m.liepin.com/company/9947855/  \n",
       "38   https://m.liepin.com/company/2115085/  \n",
       "39   https://m.liepin.com/company/2115085/  \n",
       "40   https://m.liepin.com/company/2115085/  \n",
       "41   https://m.liepin.com/company/2115085/  \n",
       "42  https://m.liepin.com/company/10156263/  \n",
       "43  https://m.liepin.com/company/10118191/  \n",
       "44   https://m.liepin.com/company/9947855/  \n",
       "45  https://m.liepin.com/company/10156263/  \n",
       "46   https://m.liepin.com/company/8845617/  \n",
       "47   https://m.liepin.com/company/9412628/  \n",
       "48   https://m.liepin.com/company/9220328/  \n",
       "49                                          \n",
       "50                                          \n",
       "51                                          \n",
       "52                                          \n",
       "53                                          \n",
       "54                                          \n",
       "55                                          \n",
       "56                                          \n",
       "57                                          \n",
       "58                                          \n",
       "59                                          "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url = \"https://m.liepin.com/zhaopin/?keyword=PRD\"\n",
    "session = HTMLSession()\n",
    "r = session.get( url )\n",
    "\n",
    "主要元素 = r.html.xpath( \\\n",
    "    '//div[@class=\"job-card-wrap\"]//div[@class=\"job-card\"]')\n",
    "\n",
    "dict_xpaths={ \n",
    "    'text': {\n",
    "        '经验':      './/ul/li[time]/text()'\n",
    "    },\n",
    "    'text_content': {\n",
    "        '职称':    './/ul/li/a[contains(@class,\"job-name\")]/span[@class=\"name-text\"]', \n",
    "        '薪水':    './/ul/li/a[contains(@class,\"job-name\")]/following-sibling::span', \n",
    "        '公司地点':'.//ul/li/time/following-sibling::a',\n",
    "        '公司名称': './/ul/li/a[contains(@class,\"company-name\")]', \n",
    "        '时间':    './/ul/li/time', \n",
    "    },\n",
    "    'href': {\n",
    "        '链结':    './/ul/li/a[contains(@class,\"job-name\")]', \n",
    "        '公司URL': './/ul/li/a[contains(@class,\"company-name\")]', \n",
    "    }\n",
    "}\n",
    "\n",
    "def get_e_text_content(_xpath_):\n",
    "    # 高级列表推导\n",
    "    暂存结果 = [e.xpath(_xpath_)[0].lxml.text_content() for e in 主要元素]\n",
    "    return(暂存结果)\n",
    "\n",
    "def get_e_text(_xpath_):\n",
    "    # 高级列表推导\n",
    "    暂存结果 = [\"\".join([x.strip() for x in e.xpath(_xpath_)]) for e in 主要元素]\n",
    "    return(暂存结果)\n",
    "\n",
    "def get_e_href(_xpath_):\n",
    "    # 高级列表推导\n",
    "    暂存结果 = [list(e.xpath(_xpath_, first=True).absolute_links)[0] \\\n",
    "               if len(e.xpath(_xpath_, first=True).absolute_links) >= 1  \\\n",
    "               else \"\" for e in 主要元素]\n",
    "    return(暂存结果)\n",
    "\n",
    "数据字典 = dict()\n",
    "\n",
    "数据字典 = {k:get_e_text_content(v) for k,v in dict_xpaths['text_content'].items()}\n",
    "数据字典.update({k:get_e_text(v) for k,v in dict_xpaths['text'].items()})\n",
    "数据字典.update({k:get_e_href(v) for k,v in dict_xpaths['href'].items()})\n",
    "\n",
    "print ([len(v) for k,v in 数据字典.items()])  # 檢查\n",
    "\n",
    "数据 = pd.DataFrame(数据字典)\n",
    "数据.to_excel(\"20春_Web数据挖掘_week02_liepin.xlsx\", sheet_name=\"搜查结果\")\n",
    "数据 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'职称': ['旅游产品经理 ', '教育科技 软件产品经理 ', '产品经理 ', '实施经理 ', '互联网产品经理 ', '后台产品经理 ', '区块链产品经理 ', '高级产品经理 ', '产品经理（电商系统） ', 'WMS产品经理 ', '产品经理（支付/后端） ', '产品总监 ', '产品专员 ', '产品助理 ', '产品经理 ', '产品经理 ', '供应链产品经理 ', '产品经理 ', '产品经理（校园招聘） ', 'WXG03-微信公众号小程序生活服务行业产品经理（广州） ', '高级业务架构师（数字化新零售） ', '产品经理（智能终端产品） ', '产品经理 ', '产品经理（营收） ', '产品经理-内容优化方向 ', '平台SDK产品经理 ', '账号产品经理 ', '产品经理（电商/社交方向） ', '资深产品经理（相机产品） ', '产品经理（用户体验改善） ', '产品经理 ', '高级产品经理 ', '产品专员 ', '高级产品经理(J10274) ', 'ATS需求分析师 ', '产品经理（临床科研） ', '产品经理-供应链金融 ', '直播产品经理 ', '产品经理（全自动运行方向） ', '产品经理（节能控制方向） ', '产品经理（智能运维方向） ', '产品经理（数字孪生方向） ', '产品经理 ', '数据产品经理 ', '直播营收产品运营经理 ', '产品策划经理 ', '产品经理（跨境业务） ', '内衣项目总经理 - 跨境电商出口行业 ', '产品经理（OA） ', '资深产品 ', 'APP产品经理（知识付费/在线教育） ', 'APP产品经理（知识付费/在线教育） ', '国际支付产品经理 ', 'crm高级产品经理 ', '产品总监 ', '数据中台产品经理 ', '商家履约产品经理 ', 'WMS产品经理 ', '资深产品经理（后台系统） ', 'wms产品经理 '], '薪水': ['12-20k·12薪', '12-18k·12薪', '12-18k·12薪', '16-23k·12薪', '10-15k·12薪', '10-20k·12薪', '15-25k·12薪', '20-25k·13薪', '25-40k·14薪', '20-35k·14薪', '10-20k·12薪', '50-70k·13薪', '5-8k·12薪', '5-8k·13薪', '10-20k·12薪', '14-22k·12薪', '10-23k·12薪', '15-25k·12薪', '8-12k·13薪', '面议', '面议', '面议', '20-40k·12薪', '20-30k·12薪', '15-25k·12薪', '15-25k·12薪', '15-25k·12薪', '8-15k·13薪', '面议', '15-25k·12薪', '15-20k·13薪', '25-35k·15薪', '6-10k·15薪', '14-20k·12薪', '面议', '15-25k·12薪', '15-30k·12薪', '15-25k·15薪', '10-20k·12薪', '10-20k·12薪', '10-20k·14薪', '10-20k·14薪', '8-10k·12薪', '18-25k·12薪', '20-30k·15薪', '8-12k·12薪', '10-20k·12薪', '20-25k·12薪', '10-15k·12薪', '25-45k·16薪', '15-25k·12薪', '10-15k·12薪', '25-50k·12薪', '25-50k·14薪', '80-110k·15薪', '20-40k·16薪', '25-50k·18薪', '25-50k·18薪', '20-40k·12薪', '25-50k·14薪'], '公司地点': ['广州', '广州', '广州-海珠区', '广州-大沙', '广州-琶洲', '广州', '广州-黄埔区', '广州', '广东,深圳,广州', '广州', '广州-海珠区', '广州', '广州-海珠区', '广州-海珠区', '广州-天河区', '广州', '广州-黄埔区', '广州-海珠区', '广州', '广州', '广州-海珠区', '广州', '广州-番禺区', '广州', '广州', '广州', '广州', '广州', '广州', '广州-天河区', '广州', '广州-天河区', '广州-天河区', '广州', '广州', '广州', '广州-天河区', '广州-天河区', '广州', '广州', '广州', '广州', '广州-天河区', '广州', '广州-天河区', '广州-天河区', '广州', '广州-越秀区', '广州-天河区', '广州,杭州', '广州', '广州-天河区', '广州', '广州-天河区', '广州', '广州', '广东,广州,杭州', '广东,广州,杭州', '广州', '广州-海珠区,杭州-西湖区'], '公司名称': ['前海爱讯科技(深圳)有限公司', '融捷投资控股集团', '广州大白互联网科技有限公司', '广东卓志供应链服务集团有限公司', '广东车海洋环保科技有限公司', '广东南方新媒体股份有限公司', '北京普瑞未来教育科技集团有限公司', '某软件开发企业', '知名跨境电商公司', '某知名跨境电商平台', '北京路客互联网科技有限公司', '名创优品', '广州三易互联网科技有限公司', '广州三易互联网科技有限公司', '广州易达建信科技开发有限公司', '锦江信息技术(广州)有限公司', '健客网', '青木数字技术股份有限公司', '佳都新太科技', '腾讯', '广州滴普科技有限公司', '佳都新太科技', '广州探迹科技有限公司', '上海翡翠东方网络信息技术有限公司', '上海翡翠东方网络信息技术有限公司', '上海翡翠东方网络信息技术有限公司', '上海翡翠东方网络信息技术有限公司', '广东人民出版社有限公司', '网易集团', '嘟比英语', '广州诚迈信息科技有限公司', '上海翡翠东方网络信息技术有限公司', '上海翡翠东方网络信息技术有限公司', '广州金鹏集团有限公司', '佳都新太科技', '健康互联(广州)信息科技股份有限公司', 'TCL金融', '上海翡翠东方网络信息技术有限公司', '佳都新太科技', '佳都新太科技', '佳都新太科技', '佳都新太科技', '广东高乐教育科技有限公司', '广州共享出行科技有限公司', '上海翡翠东方网络信息技术有限公司', '广东高乐教育科技有限公司', '易联支付', '文峰荟(广州)创业投资管理有限公司', '佛山市艾臣家居科技有限公司', '知名互联网公司', '知识付费在线教育APP', '知识付费在线教育APP', '知名金融科技企业', '国内知少儿绘画教育平台', '某世界100强高科技公司', '某大型游戏公司', '某大型知名跨境电商平台准独角兽（C+轮）', '某大型知名跨境电商平台准独角兽（C+轮）', '国内大型电商公司', '某快速发展的电商公司'], '时间': ['19小时前', '昨天', '昨天', '2020-03-23', '2020-03-20', '一个月前', '一个月前', '昨天', '昨天', '一个月前', '一个月前', '一个月前', '一个月前', '一个月前', '一个月前', '一个月前', '一个月前', '1小时前', '2小时前', '11分钟前', '2小时前', '2小时前', '35分钟前', '1小时前', '1小时前', '1小时前', '1小时前', '2小时前', '3小时前', '6分钟前', '1小时前', '1小时前', '1小时前', '2小时前', '2小时前', '2小时前', '2小时前', '1小时前', '2小时前', '2小时前', '2小时前', '2小时前', '51分钟前', '25分钟前', '1小时前', '51分钟前', '1小时前', '1小时前', '3小时前', '1小时前', '1小时前', '41分钟前', '2小时前', '1小时前', '2小时前', '1小时前', '45分钟前', '45分钟前', '2小时前', '2小时前'], '经验': ['2年以上 统招本科', '3年以上 统招本科', '2年以上 本科及以上', '5年以上 统招本科', '3年以上 大专及以上', '3年以上 本科及以上', '3年以上 大专及以上', '3年以上 大专及以上', '3年以上 统招本科', '2年以上 学历不限', '3年以上 本科及以上', '8年以上 统招本科', '经验不限 学历不限', '经验不限 本科及以上', '1年以上 大专及以上', '5年以上 本科及以上', '2年以上 本科及以上', '5年以上 统招本科', '经验不限 统招本科', '2年以上 本科及以上', '8年以上 本科及以上', '3年以上 统招本科', '3年以上 统招本科', '经验不限 本科及以上', '经验不限 本科及以上', '经验不限 本科及以上', '3年以上 本科及以上', '3年以上 大专及以上', '5年以上 本科及以上', '3年以上 统招本科', '3年以上 本科及以上', '5年以上 本科及以上', '1年以上 本科及以上', '3年以上 大专及以上', '3年以上 统招本科', '2年以上 本科及以上', '3年以上 统招本科', '3年以上 本科及以上', '3年以上 本科及以上', '3年以上 本科及以上', '5年以上 统招本科', '5年以上 统招本科', '2年以上 本科及以上', '5年以上 本科及以上', '3年以上 本科及以上', '3年以上 本科及以上', '3年以上 统招本科', '10年以上 统招本科', '3年以上 学历不限', '5年以上 统招本科', '2年以上 统招本科', '3年以上 本科及以上', '3年以上 统招本科', '5年以上 统招本科', '5年以上 统招本科', '3年以上 本科及以上', '3年以上 统招本科', '3年以上 统招本科', '3年以上 本科及以上', '3年以上 统招本科'], '链结': ['https://m.liepin.com/job/1926703515.shtml', 'https://m.liepin.com/job/1922705123.shtml', 'https://m.liepin.com/job/1922402715.shtml', 'https://m.liepin.com/job/1924985573.shtml', 'https://m.liepin.com/job/1917453193.shtml', 'https://m.liepin.com/job/1925126353.shtml', 'https://m.liepin.com/job/1919835727.shtml', 'https://m.liepin.com/a/18948933.shtml', 'https://m.liepin.com/a/18705133.shtml', 'https://m.liepin.com/a/18963147.shtml', 'https://m.liepin.com/job/1917750895.shtml', 'https://m.liepin.com/job/1925389277.shtml', 'https://m.liepin.com/job/1922364281.shtml', 'https://m.liepin.com/job/1922356557.shtml', 'https://m.liepin.com/job/1919464529.shtml', 'https://m.liepin.com/job/1919024715.shtml', 'https://m.liepin.com/job/1914662183.shtml', 'https://m.liepin.com/job/1927082439.shtml', 'https://m.liepin.com/job/1927075137.shtml', 'https://m.liepin.com/job/1927010729.shtml', 'https://m.liepin.com/job/1926800719.shtml', 'https://m.liepin.com/job/1926797053.shtml', 'https://m.liepin.com/job/1926723287.shtml', 'https://m.liepin.com/job/1926712533.shtml', 'https://m.liepin.com/job/1926699881.shtml', 'https://m.liepin.com/job/1926699879.shtml', 'https://m.liepin.com/job/1926647497.shtml', 'https://m.liepin.com/job/1926576913.shtml', 'https://m.liepin.com/job/1926534703.shtml', 'https://m.liepin.com/job/1926412121.shtml', 'https://m.liepin.com/job/1926106673.shtml', 'https://m.liepin.com/job/1925922019.shtml', 'https://m.liepin.com/job/1925921709.shtml', 'https://m.liepin.com/job/1925674943.shtml', 'https://m.liepin.com/job/1925556345.shtml', 'https://m.liepin.com/job/1925540179.shtml', 'https://m.liepin.com/job/1925519307.shtml', 'https://m.liepin.com/job/1924987385.shtml', 'https://m.liepin.com/job/1924819589.shtml', 'https://m.liepin.com/job/1924819521.shtml', 'https://m.liepin.com/job/1924549497.shtml', 'https://m.liepin.com/job/1924549389.shtml', 'https://m.liepin.com/job/1924467355.shtml', 'https://m.liepin.com/job/1924261327.shtml', 'https://m.liepin.com/job/1924139323.shtml', 'https://m.liepin.com/job/1922519235.shtml', 'https://m.liepin.com/job/1921011983.shtml', 'https://m.liepin.com/job/1920988285.shtml', 'https://m.liepin.com/job/1919955237.shtml', 'https://m.liepin.com/a/19561009.shtml', 'https://m.liepin.com/a/19548311.shtml', 'https://m.liepin.com/a/19483901.shtml', 'https://m.liepin.com/a/19439035.shtml', 'https://m.liepin.com/a/19432331.shtml', 'https://m.liepin.com/a/19427061.shtml', 'https://m.liepin.com/a/19399851.shtml', 'https://m.liepin.com/a/19371747.shtml', 'https://m.liepin.com/a/19370965.shtml', 'https://m.liepin.com/a/19250159.shtml', 'https://m.liepin.com/a/19151205.shtml'], '公司URL': ['https://m.liepin.com/company/8972310/', 'https://m.liepin.com/company/8025674/', 'https://m.liepin.com/company/8695948/', 'https://m.liepin.com/company/9238204/', 'https://m.liepin.com/company/9256869/', 'https://m.liepin.com/company/7889168/', 'https://m.liepin.com/company/9989029/', '', '', '', 'https://m.liepin.com/company/9284656/', 'https://m.liepin.com/company/8392675/', 'https://m.liepin.com/company/9647941/', 'https://m.liepin.com/company/9647941/', 'https://m.liepin.com/company/5493174/', 'https://m.liepin.com/company/8973053/', 'https://m.liepin.com/company/582047/', 'https://m.liepin.com/company/12191983/', 'https://m.liepin.com/company/2115085/', 'https://m.liepin.com/company/7983148/', 'https://m.liepin.com/company/10166945/', 'https://m.liepin.com/company/2115085/', 'https://m.liepin.com/company/8836020/', 'https://m.liepin.com/company/9947855/', 'https://m.liepin.com/company/9947855/', 'https://m.liepin.com/company/9947855/', 'https://m.liepin.com/company/9947855/', 'https://m.liepin.com/company/10012691/', 'https://m.liepin.com/company/5964833/', 'https://m.liepin.com/company/12166375/', 'https://m.liepin.com/company/10063493/', 'https://m.liepin.com/company/9947855/', 'https://m.liepin.com/company/9947855/', 'https://m.liepin.com/company/7999640/', 'https://m.liepin.com/company/2115085/', 'https://m.liepin.com/company/10087541/', 'https://m.liepin.com/company/7876336/', 'https://m.liepin.com/company/9947855/', 'https://m.liepin.com/company/2115085/', 'https://m.liepin.com/company/2115085/', 'https://m.liepin.com/company/2115085/', 'https://m.liepin.com/company/2115085/', 'https://m.liepin.com/company/10156263/', 'https://m.liepin.com/company/10118191/', 'https://m.liepin.com/company/9947855/', 'https://m.liepin.com/company/10156263/', 'https://m.liepin.com/company/8845617/', 'https://m.liepin.com/company/9412628/', 'https://m.liepin.com/company/9220328/', '', '', '', '', '', '', '', '', '', '', '']}\n"
     ]
    }
   ],
   "source": [
    "print(数据字典)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[<Element 'div' class=('search-conditions',) data-selector='search-conditions'>]\n",
      "<Element 'div' class=('search-conditions',) data-selector='search-conditions'>\n",
      "[<Element 'dt' class=('search-title',)>, <Element 'dt' class=('search-title',)>, <Element 'dt' class=('search-title',)>, <Element 'dt' class=('search-title',)>, <Element 'dt' class=('search-title',)>]\n",
      "公司：\n",
      "行业：\n",
      "城市：\n",
      "薪资：\n",
      "更多：\n",
      "<Element 'dd' class=('comp-list',)>\n",
      "<Element 'dd' class=('short-dd', 'select-industry') data-param='industries'>\n",
      "<Element 'dd' data-param='city'>\n",
      "<Element 'dd' data-param='salary'>\n",
      "<Element 'dd' class=('dropdown', 'dropdown-time')>\n",
      "<Element 'dd' class=('dropdown', 'dropdown-jobkind')>\n",
      "<Element 'dd' class=('dropdown', 'dropdown-compscale')>\n",
      "<Element 'dd' class=('dropdown', 'dropdown-compkind')>\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'中国500强': '/zhaopin/?init=-1&headckid=58d828c357a8cb19&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=155&ckid=58d828c357a8cb19&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=6aa779111c1b4ca77cff3648d9dee049&d_curPage=0&d_pageSize=40&d_headId=6aa779111c1b4ca77cff3648d9dee049',\n",
       " '2018互联网300强': '/zhaopin/?init=-1&headckid=58d828c357a8cb19&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=182&ckid=58d828c357a8cb19&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=6aa779111c1b4ca77cff3648d9dee049&d_curPage=0&d_pageSize=40&d_headId=6aa779111c1b4ca77cff3648d9dee049',\n",
       " '制造业500强': '/zhaopin/?init=-1&headckid=58d828c357a8cb19&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=186&ckid=58d828c357a8cb19&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=6aa779111c1b4ca77cff3648d9dee049&d_curPage=0&d_pageSize=40&d_headId=6aa779111c1b4ca77cff3648d9dee049',\n",
       " 'AI创新成长50强 ': '/zhaopin/?init=-1&headckid=58d828c357a8cb19&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=189&ckid=58d828c357a8cb19&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=6aa779111c1b4ca77cff3648d9dee049&d_curPage=0&d_pageSize=40&d_headId=6aa779111c1b4ca77cff3648d9dee049',\n",
       " '独角兽': '/zhaopin/?init=-1&headckid=58d828c357a8cb19&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=130&ckid=58d828c357a8cb19&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=6aa779111c1b4ca77cff3648d9dee049&d_curPage=0&d_pageSize=40&d_headId=6aa779111c1b4ca77cff3648d9dee049',\n",
       " '上市公司': '/zhaopin/?init=-1&headckid=58d828c357a8cb19&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=156&ckid=58d828c357a8cb19&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=6aa779111c1b4ca77cff3648d9dee049&d_curPage=0&d_pageSize=40&d_headId=6aa779111c1b4ca77cff3648d9dee049'}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "主要元素 = r.html.xpath('//div[@data-selector=\"search-conditions\"]')\n",
    "# 预期是一个元素的列表？\n",
    "print (主要元素)\n",
    "print (主要元素[0])\n",
    "print (主要元素[0].xpath('//dt[@class=\"search-title\"]'))\n",
    "\n",
    "list_search_title = 主要元素[0].xpath('//dt[@class=\"search-title\"]')\n",
    "for x in list_search_title:\n",
    "    print (x.text)\n",
    "    \n",
    "list_search_dd = 主要元素[0].xpath('//dt[@class=\"search-title\"]/following-sibling::dd')\n",
    "for x in list_search_dd:\n",
    "    print (x)  \n",
    "    \n",
    "\n",
    "公司数据选择器链结 = r.html.xpath('//div[@data-selector=\"search-conditions\"]')[0] \\\n",
    "                    .xpath('//dt[@class=\"search-title\"]/following-sibling::dd')[0] \\\n",
    "                    .xpath('//div[contains(@class,\"hot-comp-tags\")]/a/@href')\n",
    "               \n",
    "公司数据选择器链结\n",
    "\n",
    "# 但我们需要知道这些选择器链结, 对映到什麽数据\n",
    "公司数据选择器链结 = r.html.xpath('//div[@data-selector=\"search-conditions\"]')[0] \\\n",
    "                    .xpath('//dt[@class=\"search-title\"]/following-sibling::dd')[0] \\\n",
    "                    .xpath('//div[contains(@class,\"hot-comp-tags\")]/a')\n",
    "公司数据选择器链结\n",
    "\n",
    "#[ x.xpath(\"a/@href\")[0] for x in 公司数据选择器链结]\n",
    "#[ x.xpath(\"a/text()\")[0] for x in 公司数据选择器链结]\n",
    "公司数据选择器链结 = { x.xpath(\"a/text()\")[0]:x.xpath(\"a/@href\")[0] for x in 公司数据选择器链结}\n",
    "公司数据选择器链结"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='init=-1&headckid=58d828c357a8cb19&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=155&ckid=58d828c357a8cb19&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=6aa779111c1b4ca77cff3648d9dee049&d_curPage=0&d_pageSize=40&d_headId=6aa779111c1b4ca77cff3648d9dee049', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='init=-1&headckid=58d828c357a8cb19&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=182&ckid=58d828c357a8cb19&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=6aa779111c1b4ca77cff3648d9dee049&d_curPage=0&d_pageSize=40&d_headId=6aa779111c1b4ca77cff3648d9dee049', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='init=-1&headckid=58d828c357a8cb19&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=186&ckid=58d828c357a8cb19&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=6aa779111c1b4ca77cff3648d9dee049&d_curPage=0&d_pageSize=40&d_headId=6aa779111c1b4ca77cff3648d9dee049', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='init=-1&headckid=58d828c357a8cb19&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=189&ckid=58d828c357a8cb19&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=6aa779111c1b4ca77cff3648d9dee049&d_curPage=0&d_pageSize=40&d_headId=6aa779111c1b4ca77cff3648d9dee049', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='init=-1&headckid=58d828c357a8cb19&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=130&ckid=58d828c357a8cb19&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=6aa779111c1b4ca77cff3648d9dee049&d_curPage=0&d_pageSize=40&d_headId=6aa779111c1b4ca77cff3648d9dee049', fragment=''),\n",
       " ParseResult(scheme='', netloc='', path='/zhaopin/', params='', query='init=-1&headckid=58d828c357a8cb19&flushckid=1&fromSearchBtn=2&keyword=PRD&compTag=156&ckid=58d828c357a8cb19&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=6aa779111c1b4ca77cff3648d9dee049&d_curPage=0&d_pageSize=40&d_headId=6aa779111c1b4ca77cff3648d9dee049', fragment='')]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from urllib.parse import urlparse, parse_qs\n",
    "[ urlparse(x) for x in 公司数据选择器链结.values()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 6 entries, 0 to 5\n",
      "Data columns (total 6 columns):\n",
      " #   Column    Non-Null Count  Dtype \n",
      "---  ------    --------------  ----- \n",
      " 0   scheme    6 non-null      object\n",
      " 1   netloc    6 non-null      object\n",
      " 2   path      6 non-null      object\n",
      " 3   params    6 non-null      object\n",
      " 4   query     6 non-null      object\n",
      " 5   fragment  6 non-null      object\n",
      "dtypes: object(6)\n",
      "memory usage: 416.0+ bytes\n",
      "scheme      1\n",
      "netloc      1\n",
      "path        1\n",
      "params      1\n",
      "query       6\n",
      "fragment    1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>scheme</th>\n",
       "      <th>netloc</th>\n",
       "      <th>path</th>\n",
       "      <th>params</th>\n",
       "      <th>query</th>\n",
       "      <th>fragment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=58d828c357a8cb19&amp;flushckid=1&amp;...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  scheme netloc       path params  \\\n",
       "0                /zhaopin/          \n",
       "\n",
       "                                               query fragment  \n",
       "0  init=-1&headckid=58d828c357a8cb19&flushckid=1&...           "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.DataFrame([ urlparse(x) for x in 公司数据选择器链结.values()])\n",
    "df.info()\n",
    "print(df.nunique())\n",
    "df.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "init             1\n",
      "headckid         1\n",
      "flushckid        1\n",
      "fromSearchBtn    1\n",
      "keyword          1\n",
      "compTag          6\n",
      "ckid             1\n",
      "siTag            1\n",
      "d_sfrom          1\n",
      "d_ckId           1\n",
      "d_curPage        1\n",
      "d_pageSize       1\n",
      "d_headId         1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>keyword</th>\n",
       "      <th>compTag</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>PRD</td>\n",
       "      <td>155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>PRD</td>\n",
       "      <td>182</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>PRD</td>\n",
       "      <td>186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>PRD</td>\n",
       "      <td>189</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>PRD</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>PRD</td>\n",
       "      <td>156</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  keyword compTag\n",
       "0     PRD     155\n",
       "1     PRD     182\n",
       "2     PRD     186\n",
       "3     PRD     189\n",
       "4     PRD     130\n",
       "5     PRD     156"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()} for x in df['query'] ])\n",
    "print (df_qs.nunique())\n",
    "df_qs.head()\n",
    "df_qs[['keyword','compTag']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'init': '-1', 'headckid': '58d828c357a8cb19', 'flushckid': '1', 'fromSearchBtn': '2', 'keyword': 'PRD', 'compTag': '155', 'ckid': '58d828c357a8cb19', 'siTag': '1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw', 'd_sfrom': 'search_unknown', 'd_ckId': '6aa779111c1b4ca77cff3648d9dee049', 'd_curPage': '0', 'd_pageSize': '40', 'd_headId': '6aa779111c1b4ca77cff3648d9dee049'}, {'init': '-1', 'headckid': '58d828c357a8cb19', 'flushckid': '1', 'fromSearchBtn': '2', 'keyword': 'PRD', 'compTag': '182', 'ckid': '58d828c357a8cb19', 'siTag': '1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw', 'd_sfrom': 'search_unknown', 'd_ckId': '6aa779111c1b4ca77cff3648d9dee049', 'd_curPage': '0', 'd_pageSize': '40', 'd_headId': '6aa779111c1b4ca77cff3648d9dee049'}, {'init': '-1', 'headckid': '58d828c357a8cb19', 'flushckid': '1', 'fromSearchBtn': '2', 'keyword': 'PRD', 'compTag': '186', 'ckid': '58d828c357a8cb19', 'siTag': '1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw', 'd_sfrom': 'search_unknown', 'd_ckId': '6aa779111c1b4ca77cff3648d9dee049', 'd_curPage': '0', 'd_pageSize': '40', 'd_headId': '6aa779111c1b4ca77cff3648d9dee049'}, {'init': '-1', 'headckid': '58d828c357a8cb19', 'flushckid': '1', 'fromSearchBtn': '2', 'keyword': 'PRD', 'compTag': '189', 'ckid': '58d828c357a8cb19', 'siTag': '1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw', 'd_sfrom': 'search_unknown', 'd_ckId': '6aa779111c1b4ca77cff3648d9dee049', 'd_curPage': '0', 'd_pageSize': '40', 'd_headId': '6aa779111c1b4ca77cff3648d9dee049'}, {'init': '-1', 'headckid': '58d828c357a8cb19', 'flushckid': '1', 'fromSearchBtn': '2', 'keyword': 'PRD', 'compTag': '130', 'ckid': '58d828c357a8cb19', 'siTag': '1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw', 'd_sfrom': 'search_unknown', 'd_ckId': '6aa779111c1b4ca77cff3648d9dee049', 'd_curPage': '0', 'd_pageSize': '40', 'd_headId': '6aa779111c1b4ca77cff3648d9dee049'}, {'init': '-1', 'headckid': '58d828c357a8cb19', 'flushckid': '1', 'fromSearchBtn': '2', 'keyword': 'PRD', 'compTag': '156', 'ckid': '58d828c357a8cb19', 'siTag': '1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw', 'd_sfrom': 'search_unknown', 'd_ckId': '6aa779111c1b4ca77cff3648d9dee049', 'd_curPage': '0', 'd_pageSize': '40', 'd_headId': '6aa779111c1b4ca77cff3648d9dee049'}] {'init': '-1', 'headckid': '58d828c357a8cb19', 'flushckid': '1', 'fromSearchBtn': '2', 'keyword': 'PRD', 'compTag': '156', 'ckid': '58d828c357a8cb19', 'siTag': '1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw', 'd_sfrom': 'search_unknown', 'd_ckId': '6aa779111c1b4ca77cff3648d9dee049', 'd_curPage': '0', 'd_pageSize': '40', 'd_headId': '6aa779111c1b4ca77cff3648d9dee049'}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "\"python 原代碼以下不處理\\ndf_qs = pd.DataFrame(list_query)\\nprint (df_qs.nunique())\\ndf_qs.head()\\ndf_qs[['keyword','compTag']]\\n\""
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "列表暂存 = [] # list()\n",
    "for x in df['query']: \n",
    "    列表暂存.append({k:v[0] for k,v in parse_qs(x).items()} )\n",
    "\n",
    "字典暂存 = dict()\n",
    "for k,v in parse_qs(x).items():     # for 键,值 in 字典.items():\n",
    "    字典暂存.update({k:v[0]})\n",
    "    \n",
    "# ----------------------------------------------\n",
    "print (列表暂存, 字典暂存) \n",
    "\n",
    "'''python 原代碼以下不處理\n",
    "df_qs = pd.DataFrame(list_query)\n",
    "print (df_qs.nunique())\n",
    "df_qs.head()\n",
    "df_qs[['keyword','compTag']]\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'init': '-1',\n",
       "  'headckid': '58d828c357a8cb19',\n",
       "  'flushckid': '1',\n",
       "  'fromSearchBtn': '2',\n",
       "  'keyword': 'PRD',\n",
       "  'compTag': '155',\n",
       "  'ckid': '58d828c357a8cb19',\n",
       "  'siTag': '1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw',\n",
       "  'd_sfrom': 'search_unknown',\n",
       "  'd_ckId': '6aa779111c1b4ca77cff3648d9dee049',\n",
       "  'd_curPage': '0',\n",
       "  'd_pageSize': '40',\n",
       "  'd_headId': '6aa779111c1b4ca77cff3648d9dee049'},\n",
       " {'init': '-1',\n",
       "  'headckid': '58d828c357a8cb19',\n",
       "  'flushckid': '1',\n",
       "  'fromSearchBtn': '2',\n",
       "  'keyword': 'PRD',\n",
       "  'compTag': '182',\n",
       "  'ckid': '58d828c357a8cb19',\n",
       "  'siTag': '1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw',\n",
       "  'd_sfrom': 'search_unknown',\n",
       "  'd_ckId': '6aa779111c1b4ca77cff3648d9dee049',\n",
       "  'd_curPage': '0',\n",
       "  'd_pageSize': '40',\n",
       "  'd_headId': '6aa779111c1b4ca77cff3648d9dee049'},\n",
       " {'init': '-1',\n",
       "  'headckid': '58d828c357a8cb19',\n",
       "  'flushckid': '1',\n",
       "  'fromSearchBtn': '2',\n",
       "  'keyword': 'PRD',\n",
       "  'compTag': '186',\n",
       "  'ckid': '58d828c357a8cb19',\n",
       "  'siTag': '1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw',\n",
       "  'd_sfrom': 'search_unknown',\n",
       "  'd_ckId': '6aa779111c1b4ca77cff3648d9dee049',\n",
       "  'd_curPage': '0',\n",
       "  'd_pageSize': '40',\n",
       "  'd_headId': '6aa779111c1b4ca77cff3648d9dee049'},\n",
       " {'init': '-1',\n",
       "  'headckid': '58d828c357a8cb19',\n",
       "  'flushckid': '1',\n",
       "  'fromSearchBtn': '2',\n",
       "  'keyword': 'PRD',\n",
       "  'compTag': '189',\n",
       "  'ckid': '58d828c357a8cb19',\n",
       "  'siTag': '1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw',\n",
       "  'd_sfrom': 'search_unknown',\n",
       "  'd_ckId': '6aa779111c1b4ca77cff3648d9dee049',\n",
       "  'd_curPage': '0',\n",
       "  'd_pageSize': '40',\n",
       "  'd_headId': '6aa779111c1b4ca77cff3648d9dee049'},\n",
       " {'init': '-1',\n",
       "  'headckid': '58d828c357a8cb19',\n",
       "  'flushckid': '1',\n",
       "  'fromSearchBtn': '2',\n",
       "  'keyword': 'PRD',\n",
       "  'compTag': '130',\n",
       "  'ckid': '58d828c357a8cb19',\n",
       "  'siTag': '1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw',\n",
       "  'd_sfrom': 'search_unknown',\n",
       "  'd_ckId': '6aa779111c1b4ca77cff3648d9dee049',\n",
       "  'd_curPage': '0',\n",
       "  'd_pageSize': '40',\n",
       "  'd_headId': '6aa779111c1b4ca77cff3648d9dee049'},\n",
       " {'init': '-1',\n",
       "  'headckid': '58d828c357a8cb19',\n",
       "  'flushckid': '1',\n",
       "  'fromSearchBtn': '2',\n",
       "  'keyword': 'PRD',\n",
       "  'compTag': '156',\n",
       "  'ckid': '58d828c357a8cb19',\n",
       "  'siTag': '1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw',\n",
       "  'd_sfrom': 'search_unknown',\n",
       "  'd_ckId': '6aa779111c1b4ca77cff3648d9dee049',\n",
       "  'd_curPage': '0',\n",
       "  'd_pageSize': '40',\n",
       "  'd_headId': '6aa779111c1b4ca77cff3648d9dee049'}]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "列表暫存 = [] # list()\n",
    "for q in df['query']: \n",
    "    字典暫存 = dict()\n",
    "    for k,v in parse_qs(q).items(): # for 鍵,值 in 字典.items():\n",
    "        字典暫存.update({k:v[0]})\n",
    "    列表暫存.append(字典暫存)\n",
    "列表暫存\n",
    "# ----------------------------------------------\n",
    "# 比較\n",
    "# 列表暫存 = [{k:v[0] for k,v in parse_qs(q).items()} for q in df['query'] ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'init': ['-1'], 'headckid': ['58d828c357a8cb19'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['PRD'], 'compTag': ['155'], 'ckid': ['58d828c357a8cb19'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['6aa779111c1b4ca77cff3648d9dee049'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['6aa779111c1b4ca77cff3648d9dee049']}\n",
      "{'中国500强': '155', '2018互联网300强': '182', '制造业500强': '186', 'AI创新成长50强 ': '189', '独角兽': '130', '上市公司': '156'}\n"
     ]
    }
   ],
   "source": [
    "def parse_url_qs_for_compTag (url):\n",
    "    six_parts = urlparse(url) \n",
    "    out = parse_qs(six_parts.query)\n",
    "    return (out)\n",
    "\n",
    "# parse_url_qs_for_compTag(list(公司数据选择器链结.values())[0])['compTag']\n",
    "参数模板 = parse_url_qs_for_compTag(list(公司数据选择器链结.values())[0])\n",
    "print(参数模板)\n",
    "# [ parse_url_qs_for_compTag(x)['compTag'] for x in 公司数据选择器链结.values()]\n",
    "[ parse_url_qs_for_compTag(x)['compTag'][0] for x in 公司数据选择器链结.values()]\n",
    "\n",
    "字典_compTag = { k:parse_url_qs_for_compTag(v)['compTag'][0] for k,v in 公司数据选择器链结.items()}\n",
    "print (字典_compTag)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'中国500强': {'init': ['-1'], 'headckid': ['58d828c357a8cb19'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['155'], 'ckid': ['58d828c357a8cb19'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['6aa779111c1b4ca77cff3648d9dee049'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['6aa779111c1b4ca77cff3648d9dee049']}, '2018互联网300强': {'init': ['-1'], 'headckid': ['58d828c357a8cb19'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['182'], 'ckid': ['58d828c357a8cb19'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['6aa779111c1b4ca77cff3648d9dee049'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['6aa779111c1b4ca77cff3648d9dee049']}, '制造业500强': {'init': ['-1'], 'headckid': ['58d828c357a8cb19'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['186'], 'ckid': ['58d828c357a8cb19'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['6aa779111c1b4ca77cff3648d9dee049'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['6aa779111c1b4ca77cff3648d9dee049']}, 'AI创新成长50强 ': {'init': ['-1'], 'headckid': ['58d828c357a8cb19'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['189'], 'ckid': ['58d828c357a8cb19'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['6aa779111c1b4ca77cff3648d9dee049'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['6aa779111c1b4ca77cff3648d9dee049']}, '独角兽': {'init': ['-1'], 'headckid': ['58d828c357a8cb19'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['130'], 'ckid': ['58d828c357a8cb19'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['6aa779111c1b4ca77cff3648d9dee049'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['6aa779111c1b4ca77cff3648d9dee049']}, '上市公司': {'init': ['-1'], 'headckid': ['58d828c357a8cb19'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['用户体验'], 'compTag': ['156'], 'ckid': ['58d828c357a8cb19'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['6aa779111c1b4ca77cff3648d9dee049'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['6aa779111c1b4ca77cff3648d9dee049']}}\n"
     ]
    }
   ],
   "source": [
    "def 参数模板生成(compTag , keyword ):\n",
    "    参数 = 参数模板.copy()\n",
    "    参数['compTag'] = compTag\n",
    "    参数['keyword'] = keyword\n",
    "    return (参数)\n",
    "\n",
    "参数_compTag_用户体验 = { k:参数模板生成(compTag = [v], keyword = ['用户体验']) for k,v in 字典_compTag.items()}\n",
    "print(参数_compTag_用户体验)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'https://www.liepin.com/zhaopin/?init=-1&headckid=58d828c357a8cb19&flushckid=1&fromSearchBtn=2&keyword=%E7%94%A8%E6%88%B7%E4%BD%93%E9%AA%8C&compTag=155&ckid=58d828c357a8cb19&siTag=1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=6aa779111c1b4ca77cff3648d9dee049&d_curPage=0&d_pageSize=40&d_headId=6aa779111c1b4ca77cff3648d9dee049'"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 中国500强\n",
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "session = HTMLSession()\n",
    "payload = 参数_compTag_用户体验['中国500强']\n",
    "r = session.get( url, params = payload)\n",
    "r.url"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "session = HTMLSession()\n",
    "\n",
    "def requests_liepin( url, params):\n",
    "    r = session.get( url , params = payload)\n",
    "\n",
    "    主要元素 = r.html.xpath( '//ul[@class=\"sojob-list\"]/li')\n",
    "\n",
    "    dict_xpaths={ \n",
    "        'text': {\n",
    "            'edu':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]',\n",
    "            '经验':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]/following-sibling::span',\n",
    "            '薪水':    '//div[contains(@class,\"job-info\")]/p/span[@class=\"text-warning\"]', \n",
    "            '时间':    '//div[contains(@class,\"job-info\")]/p/time/@title', \n",
    "            '职称':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司地点': '//div[contains(@class,\"job-info\")]/p/a',\n",
    "            '公司名称': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        },\n",
    "        'text_content': {\n",
    "        },\n",
    "        'href': {\n",
    "            '链结':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司URL': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        }\n",
    "    }\n",
    "\n",
    "    def get_e_text_content(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [e.xpath(_xpath_)[0].lxml.text_content() for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_text(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [\"\".join([x.strip() if type(x) is str else x.text.strip() for x in e.xpath(_xpath_)]) for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_href(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [list(e.xpath(_xpath_, first=True).absolute_links)[0] \\\n",
    "                   if len(e.xpath(_xpath_, first=True).absolute_links) >= 1  \\\n",
    "                   else \"\" for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    # 只对主要元素下进行.xpath取值\n",
    "    数据字典 = dict()\n",
    "\n",
    "    数据字典 = {k:get_e_text_content(v) for k,v in dict_xpaths['text_content'].items()}\n",
    "    数据字典.update({k:get_e_text(v) for k,v in dict_xpaths['text'].items()})\n",
    "    数据字典.update({k:get_e_href(v) for k,v in dict_xpaths['href'].items()})\n",
    "\n",
    "    数据 = pd.DataFrame(数据字典)\n",
    "    #数据.to_excel(\"20春_Web数据挖掘_week03_liepin.xlsx\", sheet_name=\"搜查结果\")\n",
    "    return (数据)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>edu</th>\n",
       "      <th>经验</th>\n",
       "      <th>薪水</th>\n",
       "      <th>时间</th>\n",
       "      <th>职称</th>\n",
       "      <th>公司地点</th>\n",
       "      <th>公司名称</th>\n",
       "      <th>链结</th>\n",
       "      <th>公司URL</th>\n",
       "      <th>热门公司类型</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3年以上</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年03月31日</td>\n",
       "      <td>大客户销售经理-北京-网易严选</td>\n",
       "      <td>北京-五道口</td>\n",
       "      <td>网易集团</td>\n",
       "      <td>https://www.liepin.com/job/1926756751.shtml</td>\n",
       "      <td>https://www.liepin.com/company/5964833/</td>\n",
       "      <td>中国500强</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>统招本科</td>\n",
       "      <td>3年以上</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年03月30日</td>\n",
       "      <td>阿里云智能事业群-数据技术专家(金融行业)-北京/杭州</td>\n",
       "      <td>杭州</td>\n",
       "      <td>阿里巴巴</td>\n",
       "      <td>https://www.liepin.com/job/1927063431.shtml</td>\n",
       "      <td>https://www.liepin.com/company/1072424/</td>\n",
       "      <td>中国500强</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3年以上</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年03月27日</td>\n",
       "      <td>钉钉(Dingtalk)-搜索中心-Java开发技术专家</td>\n",
       "      <td>杭州</td>\n",
       "      <td>阿里巴巴</td>\n",
       "      <td>https://www.liepin.com/job/1926996383.shtml</td>\n",
       "      <td>https://www.liepin.com/company/1072424/</td>\n",
       "      <td>中国500强</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>大专及以上</td>\n",
       "      <td>2年以上</td>\n",
       "      <td>6-8k·13薪</td>\n",
       "      <td>2020年03月25日</td>\n",
       "      <td>员工关系专员</td>\n",
       "      <td>廊坊-广阳区</td>\n",
       "      <td>中国国际技术智力合作有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1926938099.shtml</td>\n",
       "      <td>https://www.liepin.com/company/1233751/</td>\n",
       "      <td>中国500强</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3年以上</td>\n",
       "      <td>25-50k·12薪</td>\n",
       "      <td>2020年03月24日</td>\n",
       "      <td>钉钉(DingTalk)-安全运营专家-安全产品及中心</td>\n",
       "      <td>杭州</td>\n",
       "      <td>阿里巴巴</td>\n",
       "      <td>https://www.liepin.com/job/1926923363.shtml</td>\n",
       "      <td>https://www.liepin.com/company/1072424/</td>\n",
       "      <td>中国500强</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1年以上</td>\n",
       "      <td>5-8k·12薪</td>\n",
       "      <td>2020年03月26日</td>\n",
       "      <td>片区人力资源主任/专员</td>\n",
       "      <td>中山</td>\n",
       "      <td>碧桂园智慧物业服务集团股份有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1919360705.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8694860/</td>\n",
       "      <td>上市公司</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>6年以上</td>\n",
       "      <td>15-20k·13薪</td>\n",
       "      <td>2020年03月26日</td>\n",
       "      <td>法务经理/主任</td>\n",
       "      <td>深圳</td>\n",
       "      <td>中国南玻集团股份有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1926955487.shtml</td>\n",
       "      <td>https://www.liepin.com/company/9091167/</td>\n",
       "      <td>上市公司</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>统招本科</td>\n",
       "      <td>10年以上</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年03月26日</td>\n",
       "      <td>CHO/HRD</td>\n",
       "      <td>上海</td>\n",
       "      <td>银科控股</td>\n",
       "      <td>https://www.liepin.com/job/1915800458.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8582797/</td>\n",
       "      <td>上市公司</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3年以上</td>\n",
       "      <td>20-30k·12薪</td>\n",
       "      <td>2020年03月25日</td>\n",
       "      <td>SAP 运维顾问</td>\n",
       "      <td>北京</td>\n",
       "      <td>科兴</td>\n",
       "      <td>https://www.liepin.com/job/1926949105.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8593199/</td>\n",
       "      <td>上市公司</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1年以上</td>\n",
       "      <td>10-20k·12薪</td>\n",
       "      <td>2020年03月25日</td>\n",
       "      <td>新闻短视频运营 (MJ000067)</td>\n",
       "      <td>北京</td>\n",
       "      <td>凤凰新媒体</td>\n",
       "      <td>https://www.liepin.com/job/1925965933.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8139695/</td>\n",
       "      <td>上市公司</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>240 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      edu     经验          薪水           时间                            职称  \\\n",
       "0   本科及以上   3年以上          面议  2020年03月31日               大客户销售经理-北京-网易严选   \n",
       "1    统招本科   3年以上          面议  2020年03月30日   阿里云智能事业群-数据技术专家(金融行业)-北京/杭州   \n",
       "2   本科及以上   3年以上          面议  2020年03月27日  钉钉(Dingtalk)-搜索中心-Java开发技术专家   \n",
       "3   大专及以上   2年以上    6-8k·13薪  2020年03月25日                        员工关系专员   \n",
       "4   本科及以上   3年以上  25-50k·12薪  2020年03月24日   钉钉(DingTalk)-安全运营专家-安全产品及中心   \n",
       "..    ...    ...         ...          ...                           ...   \n",
       "35  本科及以上   1年以上    5-8k·12薪  2020年03月26日                   片区人力资源主任/专员   \n",
       "36  本科及以上   6年以上  15-20k·13薪  2020年03月26日                       法务经理/主任   \n",
       "37   统招本科  10年以上          面议  2020年03月26日                       CHO/HRD   \n",
       "38  本科及以上   3年以上  20-30k·12薪  2020年03月25日                      SAP 运维顾问   \n",
       "39  本科及以上   1年以上  10-20k·12薪  2020年03月25日            新闻短视频运营 (MJ000067)   \n",
       "\n",
       "      公司地点               公司名称                                           链结  \\\n",
       "0   北京-五道口               网易集团  https://www.liepin.com/job/1926756751.shtml   \n",
       "1       杭州               阿里巴巴  https://www.liepin.com/job/1927063431.shtml   \n",
       "2       杭州               阿里巴巴  https://www.liepin.com/job/1926996383.shtml   \n",
       "3   廊坊-广阳区     中国国际技术智力合作有限公司  https://www.liepin.com/job/1926938099.shtml   \n",
       "4       杭州               阿里巴巴  https://www.liepin.com/job/1926923363.shtml   \n",
       "..     ...                ...                                          ...   \n",
       "35      中山  碧桂园智慧物业服务集团股份有限公司  https://www.liepin.com/job/1919360705.shtml   \n",
       "36      深圳       中国南玻集团股份有限公司  https://www.liepin.com/job/1926955487.shtml   \n",
       "37      上海               银科控股  https://www.liepin.com/job/1915800458.shtml   \n",
       "38      北京                 科兴  https://www.liepin.com/job/1926949105.shtml   \n",
       "39      北京              凤凰新媒体  https://www.liepin.com/job/1925965933.shtml   \n",
       "\n",
       "                                      公司URL  热门公司类型  \n",
       "0   https://www.liepin.com/company/5964833/  中国500强  \n",
       "1   https://www.liepin.com/company/1072424/  中国500强  \n",
       "2   https://www.liepin.com/company/1072424/  中国500强  \n",
       "3   https://www.liepin.com/company/1233751/  中国500强  \n",
       "4   https://www.liepin.com/company/1072424/  中国500强  \n",
       "..                                      ...     ...  \n",
       "35  https://www.liepin.com/company/8694860/    上市公司  \n",
       "36  https://www.liepin.com/company/9091167/    上市公司  \n",
       "37  https://www.liepin.com/company/8582797/    上市公司  \n",
       "38  https://www.liepin.com/company/8593199/    上市公司  \n",
       "39  https://www.liepin.com/company/8139695/    上市公司  \n",
       "\n",
       "[240 rows x 10 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "\n",
    "list_df = list()\n",
    "for k,v in 参数_compTag_用户体验.items():\n",
    "    payload = v\n",
    "    df = requests_liepin( url, params = payload)\n",
    "    df = df.assign (热门公司类型 = k)    \n",
    "    list_df.append(df)\n",
    "\n",
    "df_all = pd.concat(list_df)\n",
    "df_all"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_all.to_excel(\"20春_Web数据挖掘_week03_liepin_各热门公司类型.xlsx\", sheet_name=\"搜查结果\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "edu         5\n",
      "经验         10\n",
      "薪水         74\n",
      "时间         31\n",
      "职称        183\n",
      "公司地点       80\n",
      "公司名称       59\n",
      "链结        199\n",
      "公司URL      59\n",
      "热门公司类型      6\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>职称</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>公司名称</th>\n",
       "      <th>edu</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>华为</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>腾讯</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>华为</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>科大讯飞</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>华为</th>\n",
       "      <th>硕士及以上</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>柳工机械</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>华峰集团</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">江南布衣</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>学历不限</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>龙信集团</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>80 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            职称\n",
       "公司名称 edu      \n",
       "华为   统招本科   30\n",
       "腾讯   本科及以上  29\n",
       "华为   本科及以上  16\n",
       "科大讯飞 本科及以上  14\n",
       "华为   硕士及以上   6\n",
       "...         ..\n",
       "柳工机械 统招本科    1\n",
       "华峰集团 本科及以上   1\n",
       "江南布衣 大专及以上   1\n",
       "     学历不限    1\n",
       "龙信集团 统招本科    1\n",
       "\n",
       "[80 rows x 1 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print (df_all.nunique())\n",
    "df_all[['edu']].drop_duplicates()\n",
    "\n",
    "df_all.groupby(['公司名称','edu']).agg({\"职称\":\"count\"}).sort_values(by='职称', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>职称</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>公司名称</th>\n",
       "      <th>edu</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>华为</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>腾讯</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>华为</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>科大讯飞</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>华为</th>\n",
       "      <th>硕士及以上</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>柳工机械</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>华峰集团</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">江南布衣</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>学历不限</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>龙信集团</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>80 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            职称\n",
       "公司名称 edu      \n",
       "华为   统招本科   30\n",
       "腾讯   本科及以上  29\n",
       "华为   本科及以上  16\n",
       "科大讯飞 本科及以上  14\n",
       "华为   硕士及以上   6\n",
       "...         ..\n",
       "柳工机械 统招本科    1\n",
       "华峰集团 本科及以上   1\n",
       "江南布衣 大专及以上   1\n",
       "     学历不限    1\n",
       "龙信集团 统招本科    1\n",
       "\n",
       "[80 rows x 1 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_all.groupby(['公司名称','edu']).agg({\"职称\":\"count\"}).sort_values(by='职称', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'r' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-1-84012d5349c4>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[1;31m# 先取特定元素, 精准打击其子后辈\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0m主要元素\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mxpath\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'//div[@data-selector=\"search-conditions\"]'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m \u001b[1;31m# 预期是一个元素的列表？\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m \u001b[0mprint\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0m主要元素\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 'r' is not defined"
     ]
    }
   ],
   "source": [
    "# 先取特定元素, 精准打击其子后辈\n",
    "主要元素 = r.html.xpath('//div[@data-selector=\"search-conditions\"]')\n",
    "# 预期是一个元素的列表？\n",
    "print (主要元素)\n",
    "print (主要元素[0])\n",
    "print (主要元素[0].xpath('//dt[@class=\"search-title\"]'))\n",
    "\n",
    "list_search_title = 主要元素[0].xpath('//dt[@class=\"search-title\"]')\n",
    "for x in list_search_title:\n",
    "    print (x.text)\n",
    "    \n",
    "list_search_dd = 主要元素[0].xpath('//dt[@class=\"search-title\"]/following-sibling::dd')\n",
    "for x in list_search_dd:\n",
    "    print (x)  \n",
    "    \n",
    "\n",
    "公司数据选择器链结 = r.html.xpath('//div[@data-selector=\"search-conditions\"]')[0] \\\n",
    "                    .xpath('//dt[@class=\"search-title\"]/following-sibling::dd')[1] \\\n",
    "                    .xpath('//div[contains(@class,\"hot-comp-tags\")]/a/@href')\n",
    "               \n",
    "公司数据选择器链结\n",
    "\n",
    "# 但我们需要知道这些选择器链结, 对映到什麽数据\n",
    "公司数据选择器链结 = r.html.xpath('//div[@data-selector=\"search-conditions\"]')[0] \\\n",
    "                    .xpath('//dt[@class=\"search-title\"]/following-sibling::dd')[1] \\\n",
    "                    .xpath('//div[contains(@class,\"hot-comp-tags\")]/a')\n",
    "公司数据选择器链结\n",
    "\n",
    "#[ x.xpath(\"a/@href\")[0] for x in 公司数据选择器链结]\n",
    "#[ x.xpath(\"a/text()\")[0] for x in 公司数据选择器链结]\n",
    "公司数据选择器链结 = { x.xpath(\"a/text()\")[0]:x.xpath(\"a/@href\")[0] for x in 公司数据选择器链结}\n",
    "公司数据选择器链结"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "749px",
    "left": "1125.609375px",
    "top": "110px",
    "width": "281.390625px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
